In [81]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.sparse import csr_matrix
import helper
# Import the Movies dataset
movies = pd.read_csv('ml-latest-small/movies.csv')
movies.head()
Out[81]:
| userId | movieId | rating | timestamp | |
|---|---|---|---|---|
| 0 | 1 | 31 | 2.5 | 1260759144 |
| 1 | 1 | 1029 | 3.0 | 1260759179 |
| 2 | 1 | 1061 | 3.0 | 1260759182 |
| 3 | 1 | 1129 | 2.0 | 1260759185 |
| 4 | 1 | 1172 | 4.0 | 1260759205 |
| avg_romance_rating | avg_scifi_rating | |
|---|---|---|
| userId | ||
| 1 | 3.50 | 2.40 |
| 2 | 3.59 | 3.80 |
| 3 | 3.65 | 3.14 |
| 4 | 4.50 | 4.26 |
| 5 | 4.08 | 4.00 |
| userId | avg_romance_rating | avg_scifi_rating | |
|---|---|---|---|
| 0 | 1 | 3.50 | 2.40 |
| 1 | 3 | 3.65 | 3.14 |
| 2 | 6 | 2.90 | 2.75 |
| 3 | 7 | 2.93 | 3.36 |
| 4 | 12 | 2.89 | 2.62 |
| userId | avg_romance_rating | avg_scifi_rating | avg_action_rating | |
|---|---|---|---|---|
| 0 | 1 | 3.50 | 2.40 | 2.80 |
| 1 | 3 | 3.65 | 3.14 | 3.47 |
| 2 | 6 | 2.90 | 2.75 | 3.27 |
| 3 | 7 | 2.93 | 3.36 | 3.29 |
| 4 | 12 | 2.89 | 2.62 | 3.21 |
| title | "Great Performances" Cats (1998) | $9.99 (2008) | 'Hellboy': The Seeds of Creation (2004) | 'Neath the Arizona Skies (1934) | 'Round Midnight (1986) | 'Salem's Lot (2004) | 'Til There Was You (1997) | 'burbs, The (1989) | 'night Mother (1986) | (500) Days of Summer (2009) |
|---|---|---|---|---|---|---|---|---|---|---|
| userId | ||||||||||
| 1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 5 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 6 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 4.0 | NaN | NaN |
| title | Forrest Gump (1994) | Pulp Fiction (1994) | Shawshank Redemption, The (1994) | Silence of the Lambs, The (1991) | Star Wars: Episode IV - A New Hope (1977) | Jurassic Park (1993) | Matrix, The (1999) | Toy Story (1995) | Schindler's List (1993) | Terminator 2: Judgment Day (1991) | ... | Dances with Wolves (1990) | Fight Club (1999) | Usual Suspects, The (1995) | Seven (a.k.a. Se7en) (1995) | Lion King, The (1994) | Godfather, The (1972) | Lord of the Rings: The Fellowship of the Ring, The (2001) | Apollo 13 (1995) | True Lies (1994) | Twelve Monkeys (a.k.a. 12 Monkeys) (1995) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 29 | 5.0 | 5.0 | 5.0 | 4.0 | 4.0 | 4.0 | 3.0 | 4.0 | 5.0 | 4.0 | ... | 5.0 | 4.0 | 5.0 | 4.0 | 3.0 | 5.0 | 3.0 | 5.0 | 4.0 | 2.0 |
| 508 | 4.0 | 5.0 | 4.0 | 4.0 | 5.0 | 3.0 | 4.5 | 3.0 | 5.0 | 2.0 | ... | 5.0 | 4.0 | 5.0 | 4.0 | 3.5 | 5.0 | 4.5 | 3.0 | 2.0 | 4.0 |
| 14 | 1.0 | 5.0 | 2.0 | 5.0 | 5.0 | 3.0 | 5.0 | 2.0 | 4.0 | 4.0 | ... | 3.0 | 5.0 | 5.0 | 5.0 | 4.0 | 5.0 | 5.0 | 3.0 | 4.0 | 4.0 |
| 72 | 5.0 | 5.0 | 5.0 | 4.5 | 4.5 | 4.0 | 4.5 | 5.0 | 5.0 | 3.0 | ... | 4.5 | 5.0 | 5.0 | 5.0 | 5.0 | 5.0 | 5.0 | 3.5 | 3.0 | 5.0 |
| 653 | 4.0 | 5.0 | 5.0 | 4.5 | 5.0 | 4.5 | 5.0 | 5.0 | 5.0 | 5.0 | ... | 4.5 | 5.0 | 5.0 | 4.5 | 5.0 | 4.5 | 5.0 | 5.0 | 4.0 | 5.0 |
5 rows × 30 columns
| Independence Day (a.k.a. ID4) (1996) | Mission: Impossible (1996) | Twister (1996) | Twelve Monkeys (a.k.a. 12 Monkeys) (1995) | Willy Wonka & the Chocolate Factory (1971) | Rock, The (1996) | Executive Decision (1996) | Fargo (1996) | Toy Story (1995) | Birdcage, The (1996) | ... | Midnight Cowboy (1969) | Thomas Crown Affair, The (1999) | Fried Green Tomatoes (1991) | Lethal Weapon 3 (1992) | Remember the Titans (2000) | Flintstones, The (1994) | What Lies Beneath (2000) | Donnie Brasco (1997) | Insomnia (2002) | The Hunger Games (2012) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 24 | 5 | 2 | 4 | 1 | 4 | 4 | 4 | 4 | 5 | ... | |||||||||||
| 30 | 3 | 4 | 4 | 4 | 5 | 5 | ... | ||||||||||||||
| 4 | 4 | 3 | 4 | 4 | 5 | 5 | 3 | 5 | 5 | 5 | ... | ||||||||||
| 22 | 5 | 3 | 5 | 3 | 4 | 1 | 3 | 3 | 3 | ... | |||||||||||
| 23 | 4 | 4 | 3 | 5 | 3 | 3 | 4 | 5 | 5 | 3 | ... |
5 rows × 300 columns